# -*- coding: UTF-8 -*-
# Project : bio_tc
# File : qixinSpider.py
# IDE : PyCharm
# Author : 博科（鑫贝西）田聪
# Date : 2021/11/30 15:47
# 不需要进详情页，不需要使用redis去重
import datetime
import random
import re
import time
import json
import requests

from faker import Factory

from urllib.parse import quote

from tools.toMysql import MySqlLink
from tools.toRedis import Redis_DB
from tools.sha import sha_1


class QiXinSpider:
    r = Redis_DB().r
    def __init__(self):
        self.faker = Factory.create()  # 随机UA

        self.mysql = MySqlLink()
        self.new_timestamp = datetime.datetime.now()
        self.session = requests.session()

    # 访问首页 获取cookie,session
    def index_page(self):
        index_url = 'https://www.qixin.com/'
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'user-agent': self.faker.user_agent()
        }
        response = self.session.get(
            url=index_url,
            headers=headers
        )
        if response.status_code == 200:
            print('\r\n======= 网站初始化完成 =======')

            if self.session.cookies.get('acw_tc'):
                print('======= 网站Cookie获取”完成“ =======')
                return True
            else:
                print('******* 网站Cookie获取“失败“ *******')
                return False
        else:
            print('******* 网站初始化“失败” *******')
            return False

    # POST
    def post_href(self, url, data, search_key):
        headers = {
            'User-Agent': self.faker.user_agent(),
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
            'Accept-Encoding': 'gzip, deflate, br',
            'Content-Type': 'application/json;charset=utf-8',
            'X-Requested-With': 'XMLHttpRequest',
            'Referer': 'https://zhaobiao.qixin.com/search?key={}&type=win'.format(quote(search_key)),
            'Connection': 'keep-alive',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'no-cors',
            'Sec-Fetch-Site': 'same-origin',
            'Pragma': 'no-cache',
            'Cache-Control': 'no-cache',
            'TE': 'trailers',
        }
        response = self.session.post(
            url=url,
            headers=headers,
            data=json.dumps(data),
            # params=data
        )
        if response.status_code == 200:
            return response
        else:
            return {"status": "400", "message": "error"}

    def start_request(self):
        if self.index_page():
            # cookie初始化成功
            # 初始化数据库链接
            KEYS = self.mysql.get_search_key('fa_st_sk')
            for search_key in KEYS:
                print(f"============= 当前正在爬取: {search_key}")
                time.sleep(5)
                self.parse(
                    search_key=search_key,
                )
        else:
            # cookie初始化失败
            pass

    # 列表页操作
    def parse(self, search_key, start_time='2020-11-08', end_time='2021-11-08', start=0,total = 1000):

        post_url = 'https://zhaobiao.qixin.com/api/bidding/searchAllBidding'  # 上传 链接

        params = {
            "searchMode": "keyword",
            "searchFieldsLimit": "",
            "pubDateFrom": start_time,  # 开始日期
            "pubDateTo": end_time,  # 结束日期
            "provinceCodes": [],
            "cityCodes": [],
            "industryCode": "",
            "noticeType": "30",
            "noticeTypeSub": "",
            "budgetMoneyFrom": "",
            "budgetMoneyTo": "",
            "winBidMoneyFrom": "",
            "winBidMoneyTo": "",
            "searchType": "win_bidding_search",  # win_bidding_search：中标公告 bidding_search：招标公告
            "keyword": search_key,  # 搜索内容
            "currentType": "win",
            "undefined": "",
            "start": start,  # 页码 每次+20
            "hit": 20
        }
        response = self.post_href(
            url=post_url,
            data=params,
            search_key=search_key
        )

        # {"status":"1","message":"操作成功","data":{"total":434230,"hasNextPage":true,"items":[{"id":"61881fc557e7c174388a2820","jumpArgs":{"id":"61881fc557e7c174388a2820","year":"2021"},"jumpArgsString":"{\"id\":\"61881fc557e7c174388a2820\",\"year\":\"2021\"}","title":"中国邮政储蓄银行股份有限公司牡丹江市分行新华路一支行退租装修改造工程采购项目（二次）公开招标公告","noticeTypeMainName":"招标公告","noticeTypeSubName":"招标","province":"黑龙江","city":"牡丹江","industryName":"房屋建筑","projectNumber":"2021-YC014","proprietorCompany":[{"name":"中国邮政储蓄银行股份有限公司","eid":"296031ae-3caa-461b-969e-6b63283da443","etype":0}],"agencyCompany":[{"name":"北京中天致远项目管理集团有限公司牡丹江分公司","eid":"92b8edc9-a3f8-4bf9-b8a0-e165342d98dc","etype":0}],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"中国邮政储蓄银行股份有限公司牡丹江市分行新华路一支行退租装修改造工程采购项目（二次）公开招标公告时间：2021-11-08 来源：中国邮政网　北京中天致远项目管理集团有限公司牡丹江分公司受中国邮政储蓄银行股份...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"10","relatedCompany":[{"name":"中国邮政储蓄银行股份有限公司牡丹江市分行","eid":"2ea9dc8e-764d-49f6-b90f-67dbf30c38cc","etype":0},{"name":"牡丹江市分行新华路一支行","eid":"","etype":0},{"name":"中国邮政储蓄银行黑龙江省分行二级分行","eid":"","etype":0},{"name":"中国政府","eid":"","etype":0},{"name":"中国裁判文书网","eid":"","etype":0},{"name":"中国邮政集团","eid":"","etype":0}],"publishDate":"","noticeTypeMainCode":"20","noticeTypeSubCode":"2002","hasContacts":true,"hasAttachment":false,"proprietorTags":[{"tagName":"国有企业","tagCode":"10104001"}]},{"id":"61886fdb7e415b5c51febf81","jumpArgs":{"id":"61886fdb7e415b5c51febf81","year":"2021"},"jumpArgsString":"{\"id\":\"61886fdb7e415b5c51febf81\",\"year\":\"2021\"}","title":"济宁电厂汽机安全监测10.29询价书的采购结果","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"山东","city":"济宁","industryName":"电力工程","projectNumber":"798957","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[{"name":"上海横贝自动化科技有限公司","eid":"2e3c7764-4ff2-4b4c-9a70-759348e7e769","projectBidMoney":"","etype":0}],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"询价书编号：798957 询价书名称：济宁电厂汽机安全监测10.29询价书 通过询价，经过采购小组综合评定，本次采购的中标供应商是： \t \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t上海横贝自动化科技有限公司成交金额为：52360.00 元!...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"5.24万","collectting":false,"products":[],"area":"","industryCode":"20","relatedCompany":[],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"61886fdb9ca7bc7ab78ce7ac","jumpArgs":{"id":"61886fdb9ca7bc7ab78ce7ac","year":"2021"},"jumpArgsString":"{\"id\":\"61886fdb9ca7bc7ab78ce7ac\",\"year\":\"2021\"}","title":"泗阳县新袁镇泡沫水罐消防车采购项目采购公告","noticeTypeMainName":"招标公告","noticeTypeSubName":"招标","province":"江苏","city":"宿迁","industryName":"科教文卫","projectNumber":"E3213010313202111009-1","proprietorCompany":[{"name":"泗阳县新袁镇人民政府","eid":"e08d572c-0103-11ea-a843-00163e0ca5c5","etype":4}],"agencyCompany":[{"name":"苏世建设管理集团有限公司","eid":"c53eee29-5734-4e97-8cb7-298dfb9e2827","etype":0}],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"60.00万","contentText":"项目概况 泗阳县新袁镇泡沫水罐消防车采购项目的潜在投标人应在宿迁市公共资源交易电子服务平台（http://ggzy.xzspj.suqian.gov.cn/）获取招标文件，并于2021年11月30日 09:00（北京时间）前递交投标文件。...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"11","relatedCompany":[{"name":"中华人民共和国政府","eid":"","etype":0},{"name":"宿迁市政府","eid":"","etype":0},{"name":"中国政府","eid":"","etype":0},{"name":"江苏政府","eid":"","etype":0}],"publishDate":"","noticeTypeMainCode":"20","noticeTypeSubCode":"2002","hasContacts":true,"hasAttachment":false,"proprietorTags":[{"tagName":"机关单位","tagCode":"01003017"}]},{"id":"61886ff5301a8ce164abea51","jumpArgs":{"id":"61886ff5301a8ce164abea51","year":"2021"},"jumpArgsString":"{\"id\":\"61886ff5301a8ce164abea51\",\"year\":\"2021\"}","title":"都江堰市奎光塔街道民主景苑11栋附8号商铺（61.43平方米）","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"四川","city":"成都","industryName":"其他","projectNumber":"ZC(DJY)：202111-0379","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"根据《成都市人民政府办公厅关于印发成都市公共资源交易目录（2021年版）》（成办发〔2021〕60号）和市机关事务局关于行政事业单位国有资产处置项目相关政策文件规定，都江堰兴市集团有限责任公司以网络竞价方式公开...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"27","relatedCompany":[{"name":"成都市人民政府办公厅","eid":"6645a1f6-f63e-11e9-a8e9-00163e1254b5","etype":4},{"name":"都江堰兴市集团有限责任公司","eid":"0bbfe7e9-78aa-4950-bc4e-2cc172ac1e44","etype":0},{"name":"兴业银行都江堰支行","eid":"","etype":0},{"name":"成都市都江堰市公共资源交易服务中心","eid":"","etype":0}],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"61886fe811cc13942280eae8","jumpArgs":{"id":"61886fe811cc13942280eae8","year":"2021"},"jumpArgsString":"{\"id\":\"61886fe811cc13942280eae8\",\"year\":\"2021\"}","title":"广州市铁一中学印刷服务定点议价采购公告","noticeTypeMainName":"招标公告","noticeTypeSubName":"招标","province":"广东","city":"广州","industryName":"科教文卫","projectNumber":"DDYJ-2021-253960","proprietorCompany":[{"name":"广州市铁一中学","eid":"7a55caa1-3d08-4328-8db6-2ce91cf504cd","etype":4}],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"广州市铁一中学采用定点采购议价方式实施本次采购。一、项目信息 （一）项目名称：广州市铁一中学印刷服务定点采购 （二）项目编号：DDYJ-2021-253960 （三）预算金额：6,000.00 （四）采购需求： ...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"11","relatedCompany":[],"publishDate":"","noticeTypeMainCode":"20","noticeTypeSubCode":"2002","hasContacts":true,"hasAttachment":false,"proprietorTags":[{"tagName":"学校","tagCode":"01003023"},{"tagName":"事业单位","tagCode":"01003013"}]},{"id":"61886f8f0a22678aa7bd7865","jumpArgs":{"id":"61886f8f0a22678aa7bd7865","year":"2021"},"jumpArgsString":"{\"id\":\"61886f8f0a22678aa7bd7865\",\"year\":\"2021\"}","title":"济宁电厂#1、2炉布袋除尘器滤袋更换询价书的采购结果","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"山东","city":"济宁","industryName":"电力工程","projectNumber":"798785","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[{"name":"山东中泽工程科技有限公司","eid":"520b2ca3-16a8-4179-af22-3480042b044a","projectBidMoney":"","etype":0}],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"询价书编号：798785 询价书名称：济宁电厂#1、2炉布袋除尘器滤袋更换询价书 通过询价，经过采购小组综合评定，本次采购的中标供应商是： \t \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t山东中泽工程科技有限公司成交金额为：246000.00 ...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"24.60万","collectting":false,"products":[],"area":"","industryCode":"20","relatedCompany":[],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"61886fe9771f03376ec3dc92","jumpArgs":{"id":"61886fe9771f03376ec3dc92","year":"2021"},"jumpArgsString":"{\"id\":\"61886fe9771f03376ec3dc92\",\"year\":\"2021\"}","title":"济宁电厂热交换机组10.12询价书的采购结果","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"山东","city":"济宁","industryName":"电力工程","projectNumber":"795955","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[{"name":"山东舜业压力容器有限公司","eid":"3c323e70-81ef-4d7e-b2a1-e2f46bfc92b6","projectBidMoney":"","etype":0}],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"询价书编号：795955 询价书名称：济宁电厂热交换机组10.12询价书 通过询价，经过采购小组综合评定，本次采购的中标供应商是： \t \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t山东舜业压力容器有限公司成交金额为：186000.00 元!...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"18.60万","collectting":false,"products":[],"area":"","industryCode":"20","relatedCompany":[],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"6188701c66ae54cf5d8fefa6","jumpArgs":{"id":"6188701c66ae54cf5d8fefa6","year":"2021"},"jumpArgsString":"{\"id\":\"6188701c66ae54cf5d8fefa6\",\"year\":\"2021\"}","title":"都江堰市奎光塔街道民主景苑11栋附2号商铺（61.63平方米）","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"四川","city":"成都","industryName":"其他","projectNumber":"ZC(DJY)：202111-0379","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"根据《成都市人民政府办公厅关于印发成都市公共资源交易目录（2021年版）》（成办发〔2021〕60号）和市机关事务局关于行政事业单位国有资产处置项目相关政策文件规定，都江堰兴市集团有限责任公司以网络竞价方式公开...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"27","relatedCompany":[{"name":"成都市人民政府办公厅","eid":"6645a1f6-f63e-11e9-a8e9-00163e1254b5","etype":4},{"name":"都江堰兴市集团有限责任公司","eid":"0bbfe7e9-78aa-4950-bc4e-2cc172ac1e44","etype":0},{"name":"兴业银行都江堰支行","eid":"","etype":0},{"name":"成都市都江堰市公共资源交易服务中心","eid":"","etype":0}],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"61887041893b6f96824e63e2","jumpArgs":{"id":"61887041893b6f96824e63e2","year":"2021"},"jumpArgsString":"{\"id\":\"61887041893b6f96824e63e2\",\"year\":\"2021\"}","title":"都江堰市奎光塔街道民主景苑5栋附2、3、4号商铺（66.37平方米）","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"四川","city":"成都","industryName":"其他","projectNumber":"ZC(DJY)：202111-0379","proprietorCompany":[],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"根据《成都市人民政府办公厅关于印发成都市公共资源交易目录（2021年版）》（成办发〔2021〕60号）和市机关事务局关于行政事业单位国有资产处置项目相关政策文件规定，都江堰兴市集团有限责任公司以网络竞价方式公开...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"-","collectting":false,"products":[],"area":"","industryCode":"27","relatedCompany":[{"name":"成都市人民政府办公厅","eid":"6645a1f6-f63e-11e9-a8e9-00163e1254b5","etype":4},{"name":"都江堰兴市集团有限责任公司","eid":"0bbfe7e9-78aa-4950-bc4e-2cc172ac1e44","etype":0},{"name":"兴业银行都江堰支行","eid":"","etype":0},{"name":"成都市都江堰市公共资源交易服务中心","eid":"","etype":0}],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":false,"hasAttachment":false,"proprietorTags":[]},{"id":"6188704711cc13942280eaeb","jumpArgs":{"id":"6188704711cc13942280eaeb","year":"2021"},"jumpArgsString":"{\"id\":\"6188704711cc13942280eaeb\",\"year\":\"2021\"}","title":"（zg-350802-202111056153）龙岩市新罗区适中镇中心卫生院政府采购网上超市采购合同","noticeTypeMainName":"中标公告","noticeTypeSubName":"中标","province":"福建","city":"龙岩","industryName":"医疗卫生","projectNumber":"-","proprietorCompany":[{"name":"龙岩市新罗区适中镇中心卫生院","eid":"f937378a-05cb-11ea-b175-00163e0ca5c5","etype":4}],"agencyCompany":[],"relatedConstruction":[],"winnerCompany":[{"name":"珠海格力电器股份有限公司","eid":"9eda1ceb-4d50-4b02-9ef0-ad1437d24f75","projectBidMoney":"","etype":0}],"winnerCandidate":[],"projectBudgetMoney":"-","contentText":"福建省政府采购网上超市采购合同 合同订立遵循《中华人民共和国民法典》及《中华人民共和国政府采购法》，按照福建省省级政府采购网上超市管理办法签订。合同号（订单号）： ZG-350802-202111056153 ...","publishTime":"2021-11-08","projectTimeLimit":"-","bidOpenTime":"-","entryStartTime":"-","entryEndTime":"-","projectBidMoney":"6300","collectting":false,"products":[],"area":"","industryCode":"12","relatedCompany":[{"name":"福建省政府","eid":"","etype":0},{"name":"中华人民共和国政府","eid":"","etype":0},{"name":"福建省省级政府","eid":"","etype":0},{"name":"龙岩市新罗区适中镇中心卫生院","eid":"f937378a-05cb-11ea-b175-00163e0ca5c5","etype":4},{"name":"龙岩市精博亚通贸易有限公司","eid":"a3d547c9-8149-4bd3-8273-185ed2dcfa57","etype":0},{"name":"中国质量认证中心","eid":"4710e9d1-8b57-4504-9dae-89c4d59e582a","etype":0},{"name":"珠海格力电器股份有限公司","eid":"9eda1ceb-4d50-4b02-9ef0-ad1437d24f75","etype":0}],"publishDate":"","noticeTypeMainCode":"30","noticeTypeSubCode":"3001","hasContacts":true,"hasAttachment":false,"proprietorTags":[{"tagName":"卫健委","tagCode":""}]}]}}
        response_json = response.json()

        status = response_json.get('status')
        if status == 1 or status == '1':
            # 取值有结果
            response_json_data = response_json.get('data')  # type: dict

            total = response_json_data.get('total')  # 总页码

            response_json_data_items = response_json_data.get('items')  # type:list

            for items in response_json_data_items:  # type: dict
                item = {}
                proprietorCompany = items.get('proprietorCompany') # 招标人

                tens = [i.get('name') for i in proprietorCompany]
                item['tenderee'] = '|'.join(tens)
                if len(proprietorCompany) == 0:
                    item['tenderee'] = '未取到招标人名称'
                winnerCompanys = items.get('winnerCompany')  # 中标人  ps.只有中标公告和有中标的才有这个字段
                if len(winnerCompanys) == 0:
                    continue

                title = re.sub('<.*?>', '', items['title'])  # 标题
                province = items['province']  # 省份
                city = items.get('city')  # 城市 有些直辖市没有这个字段
                publishTime = items['publishTime']  # 上传时间 pubdate  "yyyy-mm-dd" 直接存char格式
                href = 'https://zhaobiao.qixin.com/' + items['jumpArgs']['id'] + items['jumpArgs']['year']  # 中标链接
                for winnerCompany in winnerCompanys:


                    name = winnerCompany.get('name')  # 中标企业名称
                    item['bid_winner'] = name
                    item['search_key'] = search_key
                    item['id'] = sha_1(item['tenderee'], item['bid_winner'], item['search_key'])

                    item['href'] = href
                    item['web_title'] = title
                    item['province'] = province
                    item['address'] = city
                    # item['projectNumber'] = projectNumber


                    item['updatetime'] = int(datetime.datetime.now().timestamp())
                    item['pubdate'] = int(datetime.datetime.strptime(publishTime, '%Y-%m-%d').timestamp())  # 需要转字符串

                    item['source'] = '启信宝'
                    self.mysql.insert_item(item)

        if start <= total:
            time.sleep(random.randint(10,20))
            print(f"爬取 {search_key} 下一页")
            self.parse(search_key, start_time, end_time, start=start + 20)


